import json
from docx import Document

# 打开文档
document = Document(r"E:/初中英语单词表大全.docx")
# 生成json文件
f = open('E:/初中英语单词表大全.json', 'w', encoding='utf-8')

# 获取所有段落
all_paragraphs = document.paragraphs
string_json = '['

count = 0

# 获取每个段落
for paragraph in all_paragraphs:
    count += 1

    if (paragraph.text.find('[') == -1):
        continue

    start, end, part_start, word_end = paragraph.text.find('['), paragraph.text.find(']'), paragraph.text.find('.'), len(paragraph.text)
    # 英文
    english = paragraph.text[0:start]
    # print(english)
    # 音标
    symbol = paragraph.text[start:end + 1]
    # print(symbol)


    # 词性
    str = paragraph.text[end+1: word_end]
    part = ""
    if ("adj" in str):
        part = "adj"
    elif ("adv" in str):
        part = "adv"
    elif ("prep" in str):
        part = "prep"
    elif ("conj" in str):
        part = "conj"
    elif ("phr." in str):
        part = "phr."
    elif ("num" in str):
        part = "num"
    elif ("pron" in str):
        part = "pron"
    elif ("v" in str):
        part = "v"
    else:
        part = "n"

    # print(part.strip())

    # 中文
    chinese = str.replace(part, "").strip()
    # print(chinese)

    dictionary = {"english": english.strip(),
                  "symbol": symbol,
                  "part": part.strip(),
                  "chinese": chinese.strip()}
    if (count == len(all_paragraphs)):
        jsonString = json.dumps(dictionary, indent=4, ensure_ascii=False)
    else:
        jsonString = json.dumps(dictionary, indent=4, ensure_ascii=False) + ','

    string_json = string_json + jsonString

f.write(string_json + ']')
f.close()


